org 100h
BASECOLOR equ 10

  mov  al,12h   ; mode 640x480x16
  int  10h
  push 0A000h
  pop  es
  fninit

M:
  mov  bp,8
U:mov  cx,16
V:
E:mov  dx,3CEh    ; GC index
  mov  ax,0x0000  ; al=0 (GC set/reset), ah=color
COL equ $-1
  out  dx,ax
  mov  ax,0x0305  ; al=5 (GC mode), ah=read mode 0, write mode 3
  out  dx,ax
  
  call S   ; generate point(cx,bp) on sphere, store to (bx,di)
  mov  si,bx
  xchg ax,di

  inc  bp
  call S
  dec  bp
  call L   ; draw line from (bx>>6,di>>6) to (si>>6,ax>>6)

  inc  cx
  call S
  dec  cx
  call L

  add  dword[T],44
  xor  byte[COL],BASECOLOR
  jnz  E
  sub  dword[T],88

  loop V

  dec  bp
  jnz  U
  
  add  dword[T],44

  mov  dx,3DAh
W:in   al,dx
  and  al,8
  jz   W

  in   al,60h
  cmp  al,1
  jnz  M
  
  ret
  
; generate point(cx,bp) on sphere, store to bx di

             ;     sp+14 12 10  8  6                  4  2  0
S:pusha      ; pusha: ax cx dx bx sp(original value) bp si di
  mov  bx,sp
  fild  word[bx+12] ; a           ; saved cx
  fmul  dword[RR]
  fsincos           ; ca sa
  fld   st0         ; ca ca sa
  fild  word[bx+4]  ; B ca ca sa  ; saved bp
  fmul  dword[RR]
  fiadd word[T]     ; b=B+t
  fsincos           ; cb sb ca ca sa
  fmulp st2,st0     ; sb ca*cb ca sa
  fmulp st2,st0     ; z=ca*cb y=ca*sb x=sa
             ; z y x
  fld   st0  ; z z y x
  fld   st3  ; x z z y x
  fild  dword[T]
  fcos
  fisub dword[T]
  fsincos       ; c s x z z y x
  fmul  st2,st0 ; c s cx z z y x
  fmulp st3,st0 ; s cx cz z y x
  fmul  st3,st0 ; s cx cz sz y x
  fmulp st5,st0 ; cx cz sz y sx
  faddp st2,st0 ; cz cx+sz y sx
  fsubp st3,st0 ; cx+sz y -sx+cx
  
  fiadd word[DEPTH]  ; Z=ca*cb+D y x
  fidivr word[ZOOM]  ; zoom/Z y x
  fmul  st1,st0    
  fmulp st2,st0    ; y/Z*zoom x/Z*zoom
  fistp word[bx+8] ; saved bx
  fistp word[bx]   ; saved di
  popa
  ret
  
; draw line from (bx>>S,di>>S) to (si>>S,ax>>S)
L:pusha

;  sub  ax,di  ;dy
;  sub  si,bx  ;dx
;  mov  cl,1  ; assume ch=0
;H:sar  si,1
;  sar  ax,1
;  add  cl,cl ; number of iters
;  imul bp,si,127; shift again if abs(bp) or abs(si) > 512
;  jo   H
;  imul bp,ax,127
;  jo   H

  sub  ax,di  ;=deltay
  mov  cx,ax
  cwd
  xor  cx,dx  ; cx = abs(deltay)
  sub  si,bx  ;=deltax
  jns  I
  sub  cx,si
  db   0x3d   ; cmp ax,NN: 1-byte skip
I:add  cx,si  ; cx = abs(deltay)+abs(deltax)
  shr  cx,7
  inc  cx     ; cx = steps = 1 + (abs(deltay)+abs(deltax)) / n
  idiv cx
  xchg ax,si
  cwd
  idiv cx

X:pusha  ; bx=x di=y si=Dx ax=Dy (10.6)
  mov  cx,0x8006
  sar  bx,cl
  sar  di,cl
  mov  cl,bl      ; cl = x
  sar  bx,3 
  imul di,80      ; offset = y*(w/8) + x/8
  ror  ch,cl      ; 0x80>>(x%8)
  xchg byte[es:bx+di+(240*640 + 320)/8],ch; load to latch, write
; 'bts' would be ideal, but it does 16-bit access (latches are only 8-bit)
F:popa
  add  bx,ax ; advance (I)
  add  di,si
;  add  bx,si ; advance (H)
;  add  di,ax
N:loop X
  popa
  ret

ZOOM  dw 20000
DEPTH dw 2
RR dd 0.392699081698724154807830422909938 ; 2*pi/16

section .bss
T resd 1
